Data Import

lowbirthweight <- read_csv("csv_NYC_lowbirthweight.csv")
## Rows: 87 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Region/County, 2018, 2019, 2020, Total
## dbl (1): Percentage
## num (1): Average births
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pm2_5 <- read_csv("pm2.5.csv")
## New names:
## Rows: 62 Columns: 8
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): State, County, Data Comment dbl (4): StateFIPS, CountyFIPS, Year, Value
## lgl (1): ...8
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...8`
edu_NY <- read_excel("Edu_NY.xlsx")
race_NY <- read_excel("Race_NY.xlsx")
## New names:
## • `` -> `...1`
HHincome_NY <- read_excel("HHincome_NY.xlsx")
## New names:
## • `` -> `...1`
Age_NY <- read_excel("Age_NY.xlsx")
## New names:
## • `` -> `...1`
Sex_NY <- read_excel("Sex_NY.xlsx")
## New names:
## • `` -> `...1`
health <- read_excel("chir_current_data.xlsx")
uscounties <- read_csv("uscounties.csv") #Simplemaps.com
## Rows: 3143 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): county, county_ascii, county_full, county_fips, state_id, state_name
## dbl (3): lat, lng, population
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Data Wrangling

PM2.5 Dataset
pm2_5 <- pm2_5 %>% 
  janitor::clean_names()%>%
  select (county,value) %>%
  rename (annual_pm2.5 = "value")

The dataset is obtained from EPH Tracking Website from CDC (https://ephtracking.cdc.gov/DataExplorer/). This has 62 rows and 2 columns of data. In which, 2 variables are:
- county: NY county name
- annual_pm2.5: annual estimated PM2.5 concentration at each county (ug/m^3)

Demographic Dataset

The following set of demographic datasets is obtained from Census Reporter webpage (https://censusreporter.org/).

Education Attainment
edu <- edu_NY %>% 
  janitor::clean_names()%>%
  mutate (percentage_high_education = (bach_male+master_male+prof_male+doct_male+bach_female+master_female+prof_female+doct_female)/total) %>%
  filter(str_detect (name, " County, NY")) %>% 
  mutate(county = str_replace (name, " County, NY", "")) %>%
  select(county,percentage_high_education) %>%
  mutate(county = str_replace (county, "St.", "St")) %>%
  mutate(county = str_replace (county, "Stuben", "Steuben"))%>%
  select(county,percentage_high_education)

This has 62 rows and 2 columns of data. In which, 2 variables are:
- county: NY county name
- percentage_high_education: percentage of population who finished a higher education level (higher than a bachelor degree)

Ethinicity
race_non_hisp_white <- race_NY %>% 
  janitor::clean_names()%>%
  filter( x1 == "White Non-Hispanic") %>% 
  pivot_longer(
    albany_county_ny : yates_county_ny,
    names_to = "county",
    values_to = "percent_non_hisp_white") %>% 
  select (county,percent_non_hisp_white) %>%
  separate(county, into = c("county", "x"), sep = "_county_") %>%
  select (county,percent_non_hisp_white ) %>%
  mutate(county = str_replace (county, "_", " ")) %>%
  mutate_at(vars(county), str_to_title)

race_non_hisp_black <- race_NY %>% 
  janitor::clean_names()%>%
  filter( x1 == "Black Non-Hispanic") %>% 
  pivot_longer(
    albany_county_ny : yates_county_ny,
    names_to = "county",
    values_to = "percent_non_hisp_black") %>% 
  select (county,percent_non_hisp_black) %>%
  separate(county, into = c("county", "x"), sep = "_county_") %>%
  select (county,percent_non_hisp_black ) %>%
  mutate(county = str_replace (county, "_", " ")) %>%
  mutate_at(vars(county), str_to_title)

race_hisp_white <- race_NY %>% 
  janitor::clean_names()%>%
  filter( x1 == "White Hispanic") %>% 
  pivot_longer(
    albany_county_ny : yates_county_ny,
    names_to = "county",
    values_to = "percent_hisp_white") %>% 
  select (county,percent_hisp_white) %>%
  separate(county, into = c("county", "x"), sep = "_county_") %>%
  select (county,percent_hisp_white ) %>%
  mutate(county = str_replace (county, "_", " ")) %>%
  mutate_at(vars(county), str_to_title)

race_hisp_black <- race_NY %>% 
  janitor::clean_names()%>%
  filter( x1 == "Black Hispanic") %>% 
  pivot_longer(
    albany_county_ny : yates_county_ny,
    names_to = "county",
    values_to = "percent_hisp_black") %>% 
  select (county,percent_hisp_black) %>%
  separate(county, into = c("county", "x"), sep = "_county_") %>%
  select (county,percent_hisp_black ) %>%
  mutate(county = str_replace (county, "_", " ")) %>%
  mutate_at(vars(county), str_to_title)

race_merge <- race_non_hisp_white %>%
  inner_join(race_non_hisp_black, by = "county") %>%
  inner_join(race_hisp_white, by = "county") %>%
  inner_join(race_hisp_black, by = "county")

race_merge <- race_merge %>% 
  mutate (percent_other = 1 - percent_non_hisp_white - percent_non_hisp_black - percent_hisp_white - percent_hisp_black)

This has 62 rows and 6 columns of data. In which, 6 variables are:
- county: NY county name
- percent_non_hisp_white: percentage of population who are identified as Non-Hispanic White
- percent_non_hisp_black: percentage of population who are identified as Non-Hispanic Black
- percent_hisp_white: percentage of population who are identified as Hispanic White
- percent_hisp_black: percentage of population who are identified as Hispanic Black
- percent_other: percentage of population who are identified as any other ethinic groups

Household Income
income <- HHincome_NY %>%
  janitor::clean_names() %>% 
  filter (x1 == "percent_high_income") %>%
   pivot_longer(
    albany_county_ny : yates_county_ny,
    names_to = "county",
    values_to = "percent_high_income") %>%
  select (county,percent_high_income) %>%
  separate(county, into = c("county", "x"), sep = "_county_") %>%
  select (county,percent_high_income ) %>%
  mutate(county = str_replace (county, "_", " ")) %>%
  mutate_at(vars(county), str_to_title)

This has 62 rows and 2 columns of data. In which, 2 variables are:
- county: NY county name
- percent_high_income: percentage of population who are at higher income households (>$75,000 annually)

Median Age
age <- Age_NY %>% 
  janitor::clean_names() %>% 
  pivot_longer(
    albany_county_ny : yates_county_ny,
    names_to = "county",
    values_to = "median_age") %>%
  select (county,median_age) %>%
  separate(county, into = c("county", "x"), sep = "_county_") %>%
  select (county,median_age ) %>%
  mutate(county = str_replace (county, "_", " ")) %>%
  mutate_at(vars(county), str_to_title)

This has 62 rows and 2 columns of data. In which, 2 variables are:
- county: NY county name
- median_age: median age of the population at each county

Sex
sex <- Sex_NY %>% 
  janitor::clean_names() %>% 
  filter (x1 == "Male:") %>%
  pivot_longer(
    albany_county_ny : yates_county_ny,
    names_to = "county",
    values_to = "percent_male") %>%
  select (county,percent_male) %>%
  separate(county, into = c("county", "x"), sep = "_county_") %>%
  select (county,percent_male ) %>%
  mutate(county = str_replace (county, "_", " ")) %>%
  mutate_at(vars(county), str_to_title)

This has 62 rows and 2 columns of data. In which, 2 variables are:
- county: NY county name
- percent_male: percentage of population who are identified as male

Low Birthweight Data
lowbirthweight <- lowbirthweight %>% 
  janitor::clean_names()%>%
  select (region_county, percentage)%>%
  rename (county = "region_county", percent_lowbirthweight = "percentage") 

This has 87 rows and 2 columns of data. In which, 2 variables are:
- county: NY county name
- percent_lowbirthweight: percentage of children being born being identified as low birth weight (<2,500g) (https://www.health.ny.gov/)

Health Indicator Dataset

The following dataset is obtained from New York State Department of Health (https://www.health.ny.gov/) that contains 4 different health indicators that will complement with the low birthweight. These 5 will act as our outcomes of interest for our regression models.

health <- health %>%
  janitor::clean_names()%>%
  select (geographic_area,indicator_title,topic_area,rate_percent,measurement) %>%
  filter( str_detect (geographic_area, " County")) %>% 
  mutate (county = str_replace (geographic_area, " County", "")) %>%
  select (county, everything()) %>%
  select (-geographic_area) %>%
  filter(topic_area == "Cancer Indicators" | topic_area == "Respiratory Disease Indicators" | topic_area == "Cardiovascular Disease Indicators" | topic_area == "Maternal and Infant Health Indicators")
  
cancer <- health %>% 
  filter (topic_area == "Cancer Indicators") %>% 
  filter (indicator_title == "All cancer incidence rate per 100,000") %>% 
  select (-c(indicator_title, topic_area, measurement)) %>% 
  rename (cancer_mortality_per_100k = "rate_percent")

resp <- health %>%
  filter (topic_area == "Respiratory Disease Indicators") %>%
  filter (indicator_title == "Asthma hospitalization rate per 10,000") %>% 
  select (-c(indicator_title, topic_area, measurement)) %>% 
  rename (asthma_hosp_rate_per_10k = "rate_percent")

cardio <- health %>%
  filter (topic_area == "Cardiovascular Disease Indicators") %>%
  filter (indicator_title == "Cardiovascular disease hospitalization rate per 10,000") %>% 
  select (-c(indicator_title, topic_area, measurement)) %>% 
  rename (cardio_hosp_rate_per_10k = "rate_percent")

maternal <- health %>%
  filter (topic_area == "Maternal and Infant Health Indicators") %>%
  filter (indicator_title == "Percentage of premature births with <37 weeks gestation") %>% 
  select (-c(indicator_title, topic_area, measurement)) %>% 
  rename (premature_percentage = "rate_percent")

They are:
- cancer_mortality_per_100k: percentage of cancer mortality per 100 thousands people in each NY county (Cancer Indicator)
- asthma_hosp_rate_per_10k: percentage of asthma hospitalization per 10 thousands people in each NY county (Respiratory Disease Indicator)
- cardio_hosp_rate_per_10k“: percentage of cardiovascular-disease-related hospitalization per 10 thousands people in each NY county (Cardiovascular Disease Indicator)
- premature_percentage: percentage of children being born prematurely (<37 gestational weeks) in each NY county

Merge dataset:

Here we perform inner_join() to create 2 bigger datasets health_merge and demographic_merge. Then, we join them with our lowbirthweight & pm2_5 tp make a finalized data frame called merge. And, we will use this for regression model.

health_merge <- maternal %>% 
  inner_join(cancer, by = "county") %>%
  inner_join(resp, by = "county") %>% 
  inner_join(cardio, by = "county") 

demographic_merge <- age %>%
  inner_join(sex,  by = "county") %>%
  inner_join(income, by = "county") %>% 
  inner_join(race_merge, by = "county") %>% 
  inner_join(edu, by = "county") %>%
  mutate(county = str_replace (county, "St ", "St. "))
  

merge <- lowbirthweight %>% 
  inner_join(pm2_5, by = "county") %>%
  inner_join(health_merge, by = "county") %>% 
  inner_join(demographic_merge, by = "county") 

merge <- merge %>% 
  select (county, annual_pm2.5,everything())%>%
  mutate(asthma_hosp_rate_per_10k = as.numeric(asthma_hosp_rate_per_10k))%>%
  mutate(cardio_hosp_rate_per_10k = as.numeric(cardio_hosp_rate_per_10k)) %>%
  mutate(premature_percentage = as.numeric(premature_percentage))%>%
  mutate(cancer_mortality_per_100k = as.numeric(cancer_mortality_per_100k))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `asthma_hosp_rate_per_10k =
##   as.numeric(asthma_hosp_rate_per_10k)`.
## Caused by warning:
## ! NAs introduced by coercion
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `premature_percentage = as.numeric(premature_percentage)`.
## Caused by warning:
## ! NAs introduced by coercion
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `cancer_mortality_per_100k =
##   as.numeric(cancer_mortality_per_100k)`.
## Caused by warning:
## ! NAs introduced by coercion
Making Map File
uscounties <- uscounties %>% 
  filter (state_id == "NY") %>%
  select (county, lat, lng)

map <- merge %>% 
  inner_join(uscounties, by ="county")

write.csv(map, "NY_map.csv", row.names = FALSE)

Exploratory Analysis

Percentage of High Income
merge %>%
 plot_ly(
    x = ~reorder(county, percent_high_income),
    y = ~percent_high_income,
    type = "bar",
    marker = list(color = "red1")
  ) %>%
  layout(
    title = "Percentage of High Income",
    xaxis = list(title = "County Name", categoryorder = "total descending"),
    yaxis = list(title = "Percentage"),
    barmode = "stack"
  )
Racial Composition
race_plot <- merge %>% 
  select (county, percent_non_hisp_white, percent_non_hisp_black, percent_hisp_white, percent_hisp_black, percent_other) %>%
  pivot_longer(
    cols = starts_with("percent_"), names_to = "race", values_to = "percentage") 

race_plot%>%
  plot_ly(x = ~county, y = ~percentage, type = "bar",color = ~race,colors = "RdYlGn", hoverinfo = "y+name") %>% 
  layout(barmode = "stack",
         title = "Racial Composition in NY county",
         xaxis = list(title = "County"),
         yaxis = list(title = "Percentage (%)"))
Outcome Graphs
Low birthweight Rate
merge %>%
  drop_na() %>%
  plot_ly(
    x = ~county,
    y = ~percent_lowbirthweight,
    color = ~county,
    type = "scatter",
    mode = "markers"
  ) %>%
  layout(
    title = "Rate of Low Birthweight",
    xaxis = list(title = "County", tickangle = 90),
    yaxis = list(title = "Percentage of Low Birthweight"),
    showlegend = FALSE
  )
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
Premature Birth
merge %>%
  drop_na() %>%
  plot_ly(
    x = ~county,
    y = ~premature_percentage,
    color = ~county,
    type = "scatter",
    mode = "markers"
  ) %>%
  layout(
    title = "Rate of Premature Birth",
    xaxis = list(title = "County", tickangle = 90),
    yaxis = list(title = "Rate of Premature Birth"),
    showlegend = FALSE
  )
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
Asthma Hospitalization
merge %>%
  drop_na() %>%
  plot_ly(
    x = ~county,
    y = ~asthma_hosp_rate_per_10k,
    color = ~county,
    type = "scatter",
    mode = "markers"
  ) %>%
  layout(
    title = "Rate of Asthma Hospitalization",
    xaxis = list(title = "County", tickangle = 90),
    yaxis = list(title = "Rate per 10k"),
    showlegend = FALSE
  )
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
Cancer Mortality Rate
merge %>%
  drop_na() %>%
  plot_ly(
    x = ~county,
    y = ~cancer_mortality_per_100k,
    color = ~county,
    type = "scatter",
    mode = "markers"
  ) %>%
  layout(
    title = "Rate of Cancer Mortality",
    xaxis = list(title = "County", tickangle = 90),
    yaxis = list(title = "Rate per 100k"),
    showlegend = FALSE
  )
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
Cardiovascular Rate
merge %>%
  drop_na() %>%
  plot_ly(
    x = ~county,
    y = ~cardio_hosp_rate_per_10k,
    color = ~county,
    type = "scatter",
    mode = "markers"
  ) %>%
  layout(
    title = "Rate of Cardiovascular Disease Hospitalization",
    xaxis = list(title = "County", tickangle = 90),
    yaxis = list(title = "Rate per 10k"),
    showlegend = FALSE
  )
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

Regression Model

Linear regression pm2.5 concentration and percentage of low birth weight
lm_pm2.5_birthweight <- lm(percent_lowbirthweight~annual_pm2.5 , data=merge)
summary(lm_pm2.5_birthweight)
## 
## Call:
## lm(formula = percent_lowbirthweight ~ annual_pm2.5, data = merge)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.7519 -0.5694  0.1806  0.9024  2.4051 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    4.0310     1.2493   3.227  0.00203 **
## annual_pm2.5   0.4535     0.1766   2.567  0.01276 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.454 on 60 degrees of freedom
## Multiple R-squared:  0.09898,    Adjusted R-squared:  0.08396 
## F-statistic: 6.591 on 1 and 60 DF,  p-value: 0.01276
lm_pm2.5_birthweight_adjusted <-lm(percent_lowbirthweight ~ annual_pm2.5 + median_age + percent_high_income + percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + percent_hisp_black + percentage_high_education + percent_male, data=merge)
summary(lm_pm2.5_birthweight_adjusted)
## 
## Call:
## lm(formula = percent_lowbirthweight ~ annual_pm2.5 + median_age + 
##     percent_high_income + percent_non_hisp_white + percent_non_hisp_black + 
##     percent_hisp_white + percent_hisp_black + percentage_high_education + 
##     percent_male, data = merge)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9906 -0.5299  0.2656  0.7311  2.0472 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                 5.26648   10.33015   0.510    0.612  
## annual_pm2.5                0.26854    0.22070   1.217    0.229  
## median_age                 -0.10337    0.05587  -1.850    0.070 .
## percent_high_income        -3.98469    4.15411  -0.959    0.342  
## percent_non_hisp_white      3.04094    4.86581   0.625    0.535  
## percent_non_hisp_black     12.77635    9.34871   1.367    0.178  
## percent_hisp_white         17.09556   17.91459   0.954    0.344  
## percent_hisp_black        -15.49031   43.02226  -0.360    0.720  
## percentage_high_education   0.11608    3.51768   0.033    0.974  
## percent_male                5.00262   15.93902   0.314    0.755  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.362 on 52 degrees of freedom
## Multiple R-squared:  0.3148, Adjusted R-squared:  0.1962 
## F-statistic: 2.654 on 9 and 52 DF,  p-value: 0.01303
stepwise
step(lm_pm2.5_birthweight_adjusted, direction = 'both')
## Start:  AIC=47.43
## percent_lowbirthweight ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + 
##     percent_hisp_black + percentage_high_education + percent_male
## 
##                             Df Sum of Sq     RSS    AIC
## - percentage_high_education  1    0.0020  96.507 45.434
## - percent_male               1    0.1828  96.688 45.550
## - percent_hisp_black         1    0.2406  96.746 45.587
## - percent_non_hisp_white     1    0.7249  97.230 45.897
## - percent_hisp_white         1    1.6901  98.195 46.509
## - percent_high_income        1    1.7076  98.213 46.520
## - annual_pm2.5               1    2.7476  99.253 47.173
## <none>                                    96.505 47.433
## - percent_non_hisp_black     1    3.4662  99.972 47.621
## - median_age                 1    6.3530 102.858 49.386
## 
## Step:  AIC=45.43
## percent_lowbirthweight ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + 
##     percent_hisp_black + percent_male
## 
##                             Df Sum of Sq     RSS    AIC
## - percent_male               1    0.1910  96.698 43.557
## - percent_hisp_black         1    0.2405  96.748 43.588
## - percent_non_hisp_white     1    0.7548  97.262 43.917
## - percent_hisp_white         1    1.9357  98.443 44.665
## - annual_pm2.5               1    2.7457  99.253 45.173
## <none>                                    96.507 45.434
## - percent_high_income        1    3.3754  99.883 45.566
## - percent_non_hisp_black     1    3.5113 100.019 45.650
## + percentage_high_education  1    0.0020  96.505 47.433
## - median_age                 1    7.4111 103.918 48.021
## 
## Step:  AIC=43.56
## percent_lowbirthweight ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + 
##     percent_hisp_black
## 
##                             Df Sum of Sq     RSS    AIC
## - percent_hisp_black         1    0.3373  97.036 41.773
## - percent_non_hisp_white     1    0.7998  97.498 42.067
## - percent_hisp_white         1    2.1084  98.807 42.894
## - annual_pm2.5               1    2.7154  99.414 43.274
## <none>                                    96.698 43.557
## - percent_non_hisp_black     1    3.4767 100.175 43.747
## - percent_high_income        1    3.9879 100.686 44.062
## + percent_male               1    0.1910  96.507 45.434
## + percentage_high_education  1    0.0102  96.688 45.550
## - median_age                 1    7.2653 103.964 46.048
## 
## Step:  AIC=41.77
## percent_lowbirthweight ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white
## 
##                             Df Sum of Sq     RSS    AIC
## - percent_non_hisp_white     1    1.2535  98.289 40.568
## - percent_hisp_white         1    1.9333  98.969 40.996
## - annual_pm2.5               1    2.7548  99.791 41.508
## <none>                                    97.036 41.773
## - percent_non_hisp_black     1    3.3156 100.351 41.856
## - percent_high_income        1    3.7372 100.773 42.116
## + percent_hisp_black         1    0.3373  96.698 43.557
## + percent_male               1    0.2879  96.748 43.588
## + percentage_high_education  1    0.0191  97.017 43.760
## - median_age                 1    7.4932 104.529 44.384
## 
## Step:  AIC=40.57
## percent_lowbirthweight ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_black + percent_hisp_white
## 
##                             Df Sum of Sq     RSS    AIC
## - percent_hisp_white         1    0.6890  98.978 39.001
## - annual_pm2.5               1    1.9873 100.277 39.809
## - percent_non_hisp_black     1    2.9014 101.191 40.372
## - percent_high_income        1    3.1909 101.480 40.549
## <none>                                    98.289 40.568
## + percent_non_hisp_white     1    1.2535  97.036 41.773
## + percent_hisp_black         1    0.7910  97.498 42.067
## + percent_male               1    0.4365  97.853 42.292
## + percentage_high_education  1    0.2070  98.082 42.438
## - median_age                 1    7.2274 105.517 42.968
## 
## Step:  AIC=39
## percent_lowbirthweight ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_black
## 
##                             Df Sum of Sq     RSS    AIC
## - annual_pm2.5               1    2.1570 101.135 38.338
## - percent_high_income        1    2.5040 101.482 38.550
## <none>                                    98.978 39.001
## + percent_hisp_white         1    0.6890  98.289 40.568
## + percentage_high_education  1    0.4551  98.523 40.716
## + percent_male               1    0.4461  98.532 40.721
## + percent_hisp_black         1    0.1537  98.824 40.905
## - percent_non_hisp_black     1    6.5238 105.502 40.959
## + percent_non_hisp_white     1    0.0091  98.969 40.996
## - median_age                 1    7.6698 106.648 41.629
## 
## Step:  AIC=38.34
## percent_lowbirthweight ~ median_age + percent_high_income + percent_non_hisp_black
## 
##                             Df Sum of Sq     RSS    AIC
## - percent_high_income        1    1.1669 102.302 37.049
## <none>                                   101.135 38.338
## + annual_pm2.5               1    2.1570  98.978 39.001
## + percent_hisp_white         1    0.8587 100.277 39.809
## + percentage_high_education  1    0.3690 100.766 40.111
## + percent_male               1    0.3526 100.783 40.122
## + percent_hisp_black         1    0.0682 101.067 40.296
## + percent_non_hisp_white     1    0.0581 101.077 40.302
## - median_age                 1    8.6493 109.785 41.426
## - percent_non_hisp_black     1   11.1390 112.274 42.816
## 
## Step:  AIC=37.05
## percent_lowbirthweight ~ median_age + percent_non_hisp_black
## 
##                             Df Sum of Sq    RSS    AIC
## <none>                                   102.30 37.049
## + percentage_high_education  1    1.4733 100.83 38.150
## + percent_high_income        1    1.1669 101.14 38.338
## + annual_pm2.5               1    0.8200 101.48 38.550
## + percent_male               1    0.6572 101.64 38.650
## + percent_non_hisp_white     1    0.0433 102.26 39.023
## + percent_hisp_white         1    0.0400 102.26 39.025
## + percent_hisp_black         1    0.0005 102.30 39.049
## - median_age                 1    9.2828 111.58 40.434
## - percent_non_hisp_black     1   10.0049 112.31 40.834
## 
## Call:
## lm(formula = percent_lowbirthweight ~ median_age + percent_non_hisp_black, 
##     data = merge)
## 
## Coefficients:
##            (Intercept)              median_age  percent_non_hisp_black  
##                11.5269                 -0.1142                  8.0498
lm_pm2.5_birthweight_adjusted_best <- lm(percent_lowbirthweight~median_age + percent_non_hisp_black, data=merge)

summary (lm_pm2.5_birthweight_adjusted_best) %>%
  tab_model()
  percent lowbirthweight
Predictors Estimates CI p
(Intercept) 11.53 7.18 – 15.88 <0.001
median age -0.11 -0.21 – -0.02 0.024
percent non hisp black 8.05 1.34 – 14.76 0.019
Observations 62
R2 / R2 adjusted 0.274 / 0.249
Linear regression pm2.5 concentration and premature birth
lm_pm2.5_premature <- lm(premature_percentage ~ annual_pm2.5 , data=merge)
summary(lm_pm2.5_premature)
## 
## Call:
## lm(formula = premature_percentage ~ annual_pm2.5, data = merge)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3611 -0.3666  0.0279  0.6960  2.1960 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.9789     0.9329   7.481 4.15e-10 ***
## annual_pm2.5   0.2637     0.1316   2.004   0.0497 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.075 on 59 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.06372,    Adjusted R-squared:  0.04785 
## F-statistic: 4.015 on 1 and 59 DF,  p-value: 0.04969
lm_pm2.5_premature_adjusted <-lm(premature_percentage ~ annual_pm2.5 + median_age + percent_high_income + percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + percent_hisp_black + percentage_high_education + percent_male, data=merge)
summary(lm_pm2.5_premature_adjusted)
## 
## Call:
## lm(formula = premature_percentage ~ annual_pm2.5 + median_age + 
##     percent_high_income + percent_non_hisp_white + percent_non_hisp_black + 
##     percent_hisp_white + percent_hisp_black + percentage_high_education + 
##     percent_male, data = merge)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4709 -0.3881  0.0150  0.5947  1.9423 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)  
## (Intercept)               -4.35484    8.04591  -0.541   0.5907  
## annual_pm2.5               0.28875    0.17252   1.674   0.1003  
## median_age                 0.07759    0.04906   1.581   0.1200  
## percent_high_income       -1.67757    3.23574  -0.518   0.6064  
## percent_non_hisp_white     4.08346    3.82446   1.068   0.2907  
## percent_non_hisp_black    15.18062    7.28782   2.083   0.0423 *
## percent_hisp_white        13.74174   14.02625   0.980   0.3319  
## percent_hisp_black        -8.02314   33.51172  -0.239   0.8117  
## percentage_high_education -0.72087    2.74036  -0.263   0.7936  
## percent_male               8.73135   12.42414   0.703   0.4854  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.061 on 51 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2123, Adjusted R-squared:  0.07328 
## F-statistic: 1.527 on 9 and 51 DF,  p-value: 0.1638
stepwise
step(lm_pm2.5_premature_adjusted, direction = 'both')
## Start:  AIC=16.3
## premature_percentage ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + 
##     percent_hisp_black + percentage_high_education + percent_male
## 
##                             Df Sum of Sq    RSS    AIC
## - percent_hisp_black         1    0.0645 57.475 14.369
## - percentage_high_education  1    0.0779 57.488 14.383
## - percent_high_income        1    0.3026 57.713 14.621
## - percent_male               1    0.5560 57.966 14.888
## - percent_hisp_white         1    1.0805 58.491 15.438
## - percent_non_hisp_white     1    1.2833 58.694 15.649
## <none>                                   57.410 16.300
## - median_age                 1    2.8154 60.226 17.221
## - annual_pm2.5               1    3.1536 60.564 17.562
## - percent_non_hisp_black     1    4.8843 62.295 19.281
## 
## Step:  AIC=14.37
## premature_percentage ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + 
##     percentage_high_education + percent_male
## 
##                             Df Sum of Sq    RSS    AIC
## - percentage_high_education  1    0.0782 57.553 12.452
## - percent_high_income        1    0.2455 57.720 12.629
## - percent_male               1    0.6430 58.118 13.048
## - percent_hisp_white         1    1.0302 58.505 13.453
## - percent_non_hisp_white     1    1.5623 59.037 14.005
## <none>                                   57.475 14.369
## - median_age                 1    2.7685 60.243 15.239
## - annual_pm2.5               1    3.1781 60.653 15.652
## + percent_hisp_black         1    0.0645 57.410 16.300
## - percent_non_hisp_black     1    4.8257 62.301 17.287
## 
## Step:  AIC=12.45
## premature_percentage ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + 
##     percent_male
## 
##                             Df Sum of Sq    RSS    AIC
## - percent_male               1    0.9031 58.456 11.402
## - percent_high_income        1    1.1751 58.728 11.685
## - percent_hisp_white         1    1.4917 59.045 12.013
## - percent_non_hisp_white     1    1.8899 59.443 12.423
## <none>                                   57.553 12.452
## - annual_pm2.5               1    3.2023 60.755 13.755
## - median_age                 1    3.3963 60.949 13.950
## + percentage_high_education  1    0.0782 57.475 14.369
## + percent_hisp_black         1    0.0649 57.488 14.383
## - percent_non_hisp_black     1    5.1011 62.654 15.632
## 
## Step:  AIC=11.4
## premature_percentage ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white
## 
##                             Df Sum of Sq    RSS    AIC
## - percent_high_income        1    1.6111 60.067 11.060
## - percent_hisp_white         1    1.7178 60.174 11.168
## <none>                                   58.456 11.402
## - percent_non_hisp_white     1    2.2100 60.666 11.665
## + percent_male               1    0.9031 57.553 12.452
## - annual_pm2.5               1    3.1340 61.590 12.588
## + percentage_high_education  1    0.3384 58.118 13.048
## - median_age                 1    3.6934 62.150 13.139
## + percent_hisp_black         1    0.1812 58.275 13.212
## - percent_non_hisp_black     1    4.9412 63.398 14.352
## 
## Step:  AIC=11.06
## premature_percentage ~ annual_pm2.5 + median_age + percent_non_hisp_white + 
##     percent_non_hisp_black + percent_hisp_white
## 
##                             Df Sum of Sq    RSS     AIC
## - percent_hisp_white         1    0.7258 60.793  9.7929
## - percent_non_hisp_white     1    1.7200 61.787 10.7824
## <none>                                   60.067 11.0603
## - annual_pm2.5               1    2.0150 62.083 11.0730
## + percentage_high_education  1    1.8034 58.264 11.2008
## + percent_high_income        1    1.6111 58.456 11.4018
## + percent_male               1    1.3392 58.728 11.6850
## - median_age                 1    3.2009 63.268 12.2272
## + percent_hisp_black         1    0.0234 60.044 13.0365
## - percent_non_hisp_black     1    4.7825 64.850 13.7334
## 
## Step:  AIC=9.79
## premature_percentage ~ annual_pm2.5 + median_age + percent_non_hisp_white + 
##     percent_non_hisp_black
## 
##                             Df Sum of Sq    RSS     AIC
## - percent_non_hisp_white     1    1.0724 61.866  8.8596
## - annual_pm2.5               1    1.9476 62.741  9.7165
## <none>                                   60.793  9.7929
## + percentage_high_education  1    1.6925 59.101 10.0706
## + percent_male               1    1.3682 59.425 10.4044
## + percent_hisp_white         1    0.7258 60.067 11.0603
## + percent_high_income        1    0.6191 60.174 11.1685
## - median_age                 1    3.5247 64.318 11.2309
## + percent_hisp_black         1    0.0078 60.785 11.7851
## - percent_non_hisp_black     1    4.1729 64.966 11.8426
## 
## Step:  AIC=8.86
## premature_percentage ~ annual_pm2.5 + median_age + percent_non_hisp_black
## 
##                             Df Sum of Sq    RSS     AIC
## - annual_pm2.5               1    1.2076 63.073  8.0388
## + percentage_high_education  1    2.1665 59.699  8.6851
## <none>                                   61.866  8.8596
## + percent_male               1    1.6360 60.230  9.2248
## + percent_high_income        1    1.1257 60.740  9.7395
## + percent_non_hisp_white     1    1.0724 60.793  9.7929
## - median_age                 1    3.9005 65.766 10.5891
## + percent_hisp_black         1    0.1279 61.738 10.7333
## + percent_hisp_white         1    0.0782 61.787 10.7824
## - percent_non_hisp_black     1    5.2522 67.118 11.8302
## 
## Step:  AIC=8.04
## premature_percentage ~ median_age + percent_non_hisp_black
## 
##                             Df Sum of Sq    RSS     AIC
## <none>                                   63.073  8.0388
## + percent_male               1    1.2567 61.817  8.8111
## + annual_pm2.5               1    1.2076 61.866  8.8596
## + percentage_high_education  1    1.0446 62.029  9.0201
## - median_age                 1    3.7908 66.864  9.5990
## + percent_non_hisp_white     1    0.3323 62.741  9.7165
## + percent_high_income        1    0.2774 62.796  9.7699
## + percent_hisp_black         1    0.1335 62.940  9.9095
## + percent_hisp_white         1    0.0007 63.073 10.0381
## - percent_non_hisp_black     1    9.6435 72.717 14.7176
## 
## Call:
## lm(formula = premature_percentage ~ median_age + percent_non_hisp_black, 
##     data = merge)
## 
## Coefficients:
##            (Intercept)              median_age  percent_non_hisp_black  
##                 4.8783                  0.0838                  8.0274
lm_pm2.5_premature_adjusted_best <- lm(premature_percentage ~ median_age + percent_non_hisp_black, data = merge)
summary(lm_pm2.5_premature_adjusted_best)%>%
  tab_model()
  premature percentage
Predictors Estimates CI p
(Intercept) 4.88 0.96 – 8.80 0.016
median age 0.08 -0.01 – 0.17 0.067
percent non hisp black 8.03 2.63 – 13.42 0.004
Observations 61
R2 / R2 adjusted 0.135 / 0.105
Linear regression pm2.5 concentration and cancer mortality
lm_pm2.5_cancer <- lm(cancer_mortality_per_100k ~ annual_pm2.5 , data=merge)
summary(lm_pm2.5_cancer)
## 
## Call:
## lm(formula = cancer_mortality_per_100k ~ annual_pm2.5, data = merge)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -206.55  -34.46    1.63   45.56  154.59 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   871.126     65.539  13.292  < 2e-16 ***
## annual_pm2.5  -27.255      9.245  -2.948  0.00458 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 75.55 on 59 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1284, Adjusted R-squared:  0.1136 
## F-statistic: 8.691 on 1 and 59 DF,  p-value: 0.004576
lm_pm2.5_cancer_adjusted <-lm(cancer_mortality_per_100k ~annual_pm2.5 + median_age + percent_high_income + percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + percent_hisp_black + percentage_high_education + percent_male, data=merge)
summary(lm_pm2.5_cancer_adjusted)
## 
## Call:
## lm(formula = cancer_mortality_per_100k ~ annual_pm2.5 + median_age + 
##     percent_high_income + percent_non_hisp_white + percent_non_hisp_black + 
##     percent_hisp_white + percent_hisp_black + percentage_high_education + 
##     percent_male, data = merge)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -82.144 -23.468   3.255  24.875  71.915 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  55.336    287.601   0.192  0.84819    
## annual_pm2.5                 13.203      6.167   2.141  0.03707 *  
## median_age                   12.889      1.754   7.349 1.53e-09 ***
## percent_high_income        -175.488    115.661  -1.517  0.13538    
## percent_non_hisp_white      448.039    136.705   3.277  0.00189 ** 
## percent_non_hisp_black      519.597    260.503   1.995  0.05145 .  
## percent_hisp_white          935.183    501.368   1.865  0.06790 .  
## percent_hisp_black        -1786.174   1197.876  -1.491  0.14209    
## percentage_high_education   -44.671     97.954  -0.456  0.65029    
## percent_male               -640.838    444.101  -1.443  0.15513    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37.92 on 51 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.8101, Adjusted R-squared:  0.7766 
## F-statistic: 24.18 on 9 and 51 DF,  p-value: 1.897e-15
stepwise
step(lm_pm2.5_cancer_adjusted, direction = 'both')
## Start:  AIC=452.62
## cancer_mortality_per_100k ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + 
##     percent_hisp_black + percentage_high_education + percent_male
## 
##                             Df Sum of Sq    RSS    AIC
## - percentage_high_education  1       299  73653 450.87
## <none>                                    73354 452.62
## - percent_male               1      2995  76348 453.06
## - percent_hisp_black         1      3198  76552 453.23
## - percent_high_income        1      3311  76665 453.32
## - percent_hisp_white         1      5004  78358 454.65
## - percent_non_hisp_black     1      5722  79076 455.20
## - annual_pm2.5               1      6593  79947 455.87
## - percent_non_hisp_white     1     15449  88803 462.28
## - median_age                 1     77687 151040 494.68
## 
## Step:  AIC=450.87
## cancer_mortality_per_100k ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + 
##     percent_hisp_black + percent_male
## 
##                             Df Sum of Sq    RSS    AIC
## <none>                                    73653 450.87
## - percent_male               1      2696  76349 451.06
## - percent_hisp_black         1      3203  76855 451.47
## + percentage_high_education  1       299  73354 452.62
## - percent_non_hisp_black     1      6218  79871 453.81
## - annual_pm2.5               1      6660  80313 454.15
## - percent_hisp_white         1      7058  80711 454.45
## - percent_high_income        1     10188  83840 456.77
## - percent_non_hisp_white     1     17739  91392 462.03
## - median_age                 1     89363 163016 497.33
## 
## Call:
## lm(formula = cancer_mortality_per_100k ~ annual_pm2.5 + median_age + 
##     percent_high_income + percent_non_hisp_white + percent_non_hisp_black + 
##     percent_hisp_white + percent_hisp_black + percent_male, data = merge)
## 
## Coefficients:
##            (Intercept)            annual_pm2.5              median_age  
##                 -1.845                  13.266                  13.138  
##    percent_high_income  percent_non_hisp_white  percent_non_hisp_black  
##               -213.487                 464.030                 536.278  
##     percent_hisp_white      percent_hisp_black            percent_male  
##               1023.820               -1787.433                -574.147
lm_pm2.5_cancer_adjusted_best <- lm(cancer_mortality_per_100k ~ annual_pm2.5 + median_age + percent_high_income + percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + percent_hisp_black + percent_male, data =merge)
summary (lm_pm2.5_cancer_adjusted_best)%>%
  tab_model()
  cancer mortality per 100
k
Predictors Estimates CI p
(Intercept) -1.85 -517.25 – 513.56 0.994
annual pm2 5 13.27 0.99 – 25.54 0.035
median age 13.14 9.82 – 16.46 <0.001
percent high income -213.49 -373.22 – -53.75 0.010
percent non hisp white 464.03 200.92 – 727.14 0.001
percent non hisp black 536.28 22.68 – 1049.88 0.041
percent hisp white 1023.82 103.51 – 1944.13 0.030
percent hisp black -1787.43 -4172.76 – 597.90 0.139
percent male -574.15 -1409.17 – 260.87 0.174
Observations 61
R2 / R2 adjusted 0.809 / 0.780
Linear regression pm2.5 concentration and asthma hospitalization
lm_pm2.5_asthma <- lm(asthma_hosp_rate_per_10k ~ annual_pm2.5 , data=merge)
summary(lm_pm2.5_asthma)
## 
## Call:
## lm(formula = asthma_hosp_rate_per_10k ~ annual_pm2.5, data = merge)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3861 -1.4748 -0.7693  0.6846 21.8951 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -7.630      3.376  -2.260 0.027646 *  
## annual_pm2.5    1.703      0.472   3.608 0.000651 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.415 on 57 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.1859, Adjusted R-squared:  0.1717 
## F-statistic: 13.02 on 1 and 57 DF,  p-value: 0.0006507
lm_pm2.5_asthma_adjusted <-lm(asthma_hosp_rate_per_10k ~ annual_pm2.5 + median_age + percent_high_income + percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + percent_hisp_black + percentage_high_education + percent_male, data=merge)
summary(lm_pm2.5_asthma_adjusted)
## 
## Call:
## lm(formula = asthma_hosp_rate_per_10k ~ annual_pm2.5 + median_age + 
##     percent_high_income + percent_non_hisp_white + percent_non_hisp_black + 
##     percent_hisp_white + percent_hisp_black + percentage_high_education + 
##     percent_male, data = merge)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7456 -0.6676 -0.1249  0.7388  2.7314 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 4.13088    7.84811   0.526   0.6010    
## annual_pm2.5               -0.08611    0.20011  -0.430   0.6689    
## median_age                  0.06249    0.05082   1.230   0.2247    
## percent_high_income         3.53198    3.08690   1.144   0.2581    
## percent_non_hisp_white      1.08237    3.67392   0.295   0.7695    
## percent_non_hisp_black     16.53403    6.95374   2.378   0.0214 *  
## percent_hisp_white          5.65133   13.45394   0.420   0.6763    
## percent_hisp_black        362.48923   32.21071  11.254 3.46e-15 ***
## percentage_high_education  -4.60085    2.73077  -1.685   0.0984 .  
## percent_male              -11.30604   12.41573  -0.911   0.3670    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.011 on 49 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:  0.9386, Adjusted R-squared:  0.9273 
## F-statistic: 83.25 on 9 and 49 DF,  p-value: < 2.2e-16
stepwise
step(lm_pm2.5_asthma_adjusted, direction = 'both')
## Start:  AIC=10.38
## asthma_hosp_rate_per_10k ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + 
##     percent_hisp_black + percentage_high_education + percent_male
## 
##                             Df Sum of Sq     RSS    AIC
## - percent_non_hisp_white     1     0.089  50.215  8.488
## - percent_hisp_white         1     0.180  50.307  8.596
## - annual_pm2.5               1     0.189  50.316  8.606
## - percent_male               1     0.848  50.975  9.374
## - percent_high_income        1     1.339  51.466  9.939
## - median_age                 1     1.547  51.673 10.176
## <none>                                    50.126 10.383
## - percentage_high_education  1     2.904  53.030 11.706
## - percent_non_hisp_black     1     5.783  55.910 14.826
## - percent_hisp_black         1   129.556 179.683 83.706
## 
## Step:  AIC=8.49
## asthma_hosp_rate_per_10k ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_black + percent_hisp_white + percent_hisp_black + 
##     percentage_high_education + percent_male
## 
##                             Df Sum of Sq     RSS    AIC
## - percent_hisp_white         1     0.092  50.307  6.596
## - annual_pm2.5               1     0.295  50.510  6.833
## - percent_male               1     0.850  51.065  7.479
## - percent_high_income        1     1.531  51.746  8.260
## - median_age                 1     1.609  51.824  8.349
## <none>                                    50.215  8.488
## - percentage_high_education  1     3.267  53.482 10.206
## + percent_non_hisp_white     1     0.089  50.126 10.383
## - percent_non_hisp_black     1    12.432  62.647 19.539
## - percent_hisp_black         1   136.612 186.827 84.006
## 
## Step:  AIC=6.6
## asthma_hosp_rate_per_10k ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_black + percent_hisp_black + percentage_high_education + 
##     percent_male
## 
##                             Df Sum of Sq     RSS    AIC
## - annual_pm2.5               1     0.285  50.592  4.929
## - percent_male               1     0.851  51.158  5.586
## - median_age                 1     1.530  51.837  6.363
## <none>                                    50.307  6.596
## - percent_high_income        1     3.275  53.582  8.317
## + percent_hisp_white         1     0.092  50.215  8.488
## + percent_non_hisp_white     1     0.000  50.307  8.596
## - percentage_high_education  1     3.894  54.201  8.994
## - percent_non_hisp_black     1    13.111  63.418 18.260
## - percent_hisp_black         1   181.546 231.853 94.745
## 
## Step:  AIC=4.93
## asthma_hosp_rate_per_10k ~ median_age + percent_high_income + 
##     percent_non_hisp_black + percent_hisp_black + percentage_high_education + 
##     percent_male
## 
##                             Df Sum of Sq     RSS    AIC
## - percent_male               1     1.030  51.622  4.118
## - median_age                 1     1.349  51.941  4.482
## <none>                                    50.592  4.929
## - percent_high_income        1     3.039  53.631  6.371
## + annual_pm2.5               1     0.285  50.307  6.596
## + percent_hisp_white         1     0.082  50.510  6.833
## + percent_non_hisp_white     1     0.012  50.580  6.915
## - percentage_high_education  1     4.599  55.191  8.063
## - percent_non_hisp_black     1    12.967  63.559 16.392
## - percent_hisp_black         1   184.303 234.895 93.515
## 
## Step:  AIC=4.12
## asthma_hosp_rate_per_10k ~ median_age + percent_high_income + 
##     percent_non_hisp_black + percent_hisp_black + percentage_high_education
## 
##                             Df Sum of Sq     RSS    AIC
## - median_age                 1     1.616  53.238  3.936
## <none>                                    51.622  4.118
## + percent_male               1     1.030  50.592  4.929
## - percent_high_income        1     2.616  54.238  5.035
## + annual_pm2.5               1     0.463  51.158  5.586
## + percent_hisp_white         1     0.081  51.541  6.025
## + percent_non_hisp_white     1     0.022  51.600  6.093
## - percentage_high_education  1     3.605  55.226  6.100
## - percent_non_hisp_black     1    13.597  65.219 15.913
## - percent_hisp_black         1   199.425 251.047 95.438
## 
## Step:  AIC=3.94
## asthma_hosp_rate_per_10k ~ percent_high_income + percent_non_hisp_black + 
##     percent_hisp_black + percentage_high_education
## 
##                             Df Sum of Sq     RSS    AIC
## <none>                                    53.238  3.936
## + median_age                 1     1.616  51.622  4.118
## + percent_male               1     1.296  51.941  4.482
## + annual_pm2.5               1     0.221  53.016  5.691
## + percent_non_hisp_white     1     0.092  53.146  5.835
## + percent_hisp_white         1     0.008  53.229  5.927
## - percent_high_income        1     4.568  57.805  6.793
## - percentage_high_education  1     6.197  59.434  8.433
## - percent_non_hisp_black     1    12.074  65.311 13.996
## - percent_hisp_black         1   198.347 251.584 93.564
## 
## Call:
## lm(formula = asthma_hosp_rate_per_10k ~ percent_high_income + 
##     percent_non_hisp_black + percent_hisp_black + percentage_high_education, 
##     data = merge)
## 
## Coefficients:
##               (Intercept)        percent_high_income  
##                     1.362                      4.653  
##    percent_non_hisp_black         percent_hisp_black  
##                    13.518                    366.672  
## percentage_high_education  
##                    -5.306
lm_pm2.5_asthma_adjusted_best <- lm(asthma_hosp_rate_per_10k~ percent_high_income + percent_non_hisp_black + percent_hisp_black + percentage_high_education, data=merge)
summary(lm_pm2.5_asthma_adjusted_best)%>%
  tab_model()
  asthma hosp rate per 10 k
Predictors Estimates CI p
(Intercept) 1.36 0.06 – 2.67 0.041
percent high income 4.65 0.32 – 8.99 0.036
percent non hisp black 13.52 5.77 – 21.26 0.001
percent hisp black 366.67 314.84 – 418.50 <0.001
percentage high education -5.31 -9.55 – -1.06 0.015
Observations 59
R2 / R2 adjusted 0.935 / 0.930
Linear regression pm2.5 concentration and cardiovascular disease hospitalization
lm_pm2.5_cardio <- lm(cardio_hosp_rate_per_10k ~ annual_pm2.5 , data=merge)
summary(lm_pm2.5_cardio)
## 
## Call:
## lm(formula = cardio_hosp_rate_per_10k ~ annual_pm2.5, data = merge)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -69.983 -11.516   4.183  18.558  49.049 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  144.4652    22.3939   6.451 2.14e-08 ***
## annual_pm2.5   0.9545     3.1662   0.301    0.764    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.07 on 60 degrees of freedom
## Multiple R-squared:  0.001512,   Adjusted R-squared:  -0.01513 
## F-statistic: 0.09088 on 1 and 60 DF,  p-value: 0.7641
lm_pm2.5_cardio_adjusted <-lm(cardio_hosp_rate_per_10k ~annual_pm2.5 + median_age + percent_high_income + percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + percent_hisp_black + percentage_high_education + percent_male, data=merge)
summary(lm_pm2.5_cardio_adjusted)
## 
## Call:
## lm(formula = cardio_hosp_rate_per_10k ~ annual_pm2.5 + median_age + 
##     percent_high_income + percent_non_hisp_white + percent_non_hisp_black + 
##     percent_hisp_white + percent_hisp_black + percentage_high_education + 
##     percent_male, data = merge)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -75.098  -8.011   3.779  12.581  33.035 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                183.1675   172.5353   1.062   0.2933  
## annual_pm2.5                 7.2209     3.6862   1.959   0.0555 .
## median_age                   1.9151     0.9331   2.052   0.0452 *
## percent_high_income        -44.4537    69.3825  -0.641   0.5245  
## percent_non_hisp_white     106.5411    81.2693   1.311   0.1956  
## percent_non_hisp_black     241.3852   156.1433   1.546   0.1282  
## percent_hisp_white         537.8787   299.2115   1.798   0.0780 .
## percent_hisp_black        -640.7688   718.5628  -0.892   0.3766  
## percentage_high_education -131.5332    58.7527  -2.239   0.0295 *
## percent_male              -432.0543   266.2154  -1.623   0.1106  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.75 on 52 degrees of freedom
## Multiple R-squared:  0.3407, Adjusted R-squared:  0.2266 
## F-statistic: 2.986 on 9 and 52 DF,  p-value: 0.00609
stepwise
step(lm_pm2.5_cardio_adjusted, direction = 'both')
## Start:  AIC=396.56
## cardio_hosp_rate_per_10k ~ annual_pm2.5 + median_age + percent_high_income + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + 
##     percent_hisp_black + percentage_high_education + percent_male
## 
##                             Df Sum of Sq   RSS    AIC
## - percent_high_income        1    212.52 27134 395.05
## - percent_hisp_black         1    411.68 27333 395.50
## <none>                                   26921 396.56
## - percent_non_hisp_white     1    889.76 27811 396.58
## - percent_non_hisp_black     1   1237.27 28159 397.35
## - percent_male               1   1363.65 28285 397.62
## - percent_hisp_white         1   1673.03 28594 398.30
## - annual_pm2.5               1   1986.63 28908 398.97
## - median_age                 1   2180.60 29102 399.39
## - percentage_high_education  1   2594.81 29516 400.26
## 
## Step:  AIC=395.05
## cardio_hosp_rate_per_10k ~ annual_pm2.5 + median_age + percent_non_hisp_white + 
##     percent_non_hisp_black + percent_hisp_white + percent_hisp_black + 
##     percentage_high_education + percent_male
## 
##                             Df Sum of Sq   RSS    AIC
## - percent_hisp_black         1     274.1 27408 393.67
## - percent_non_hisp_white     1     757.5 27891 394.75
## <none>                                   27134 395.05
## - percent_non_hisp_black     1    1120.6 28254 395.56
## - percent_male               1    1463.3 28597 396.30
## - percent_hisp_white         1    1568.1 28702 396.53
## + percent_high_income        1     212.5 26921 396.56
## - annual_pm2.5               1    1799.8 28934 397.03
## - median_age                 1    1976.3 29110 397.41
## - percentage_high_education  1    7845.9 34980 408.79
## 
## Step:  AIC=393.67
## cardio_hosp_rate_per_10k ~ annual_pm2.5 + median_age + percent_non_hisp_white + 
##     percent_non_hisp_black + percent_hisp_white + percentage_high_education + 
##     percent_male
## 
##                             Df Sum of Sq   RSS    AIC
## <none>                                   27408 393.67
## - percent_non_hisp_black     1    1078.3 28486 394.06
## - percent_male               1    1265.2 28673 394.47
## - percent_non_hisp_white     1    1285.6 28693 394.51
## + percent_hisp_black         1     274.1 27134 395.05
## - percent_hisp_white         1    1647.9 29056 395.29
## + percent_high_income        1      75.0 27333 395.50
## - annual_pm2.5               1    1959.6 29367 395.95
## - median_age                 1    2053.8 29462 396.15
## - percentage_high_education  1    7775.0 35183 407.15
## 
## Call:
## lm(formula = cardio_hosp_rate_per_10k ~ annual_pm2.5 + median_age + 
##     percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + 
##     percentage_high_education + percent_male, data = merge)
## 
## Coefficients:
##               (Intercept)               annual_pm2.5  
##                   157.519                      6.968  
##                median_age     percent_non_hisp_white  
##                     1.725                    117.257  
##    percent_non_hisp_black         percent_hisp_white  
##                   222.986                    439.557  
## percentage_high_education               percent_male  
##                  -148.614                   -405.527
lm_pm2.5_cardio_adjusted_best <-lm(cardio_hosp_rate_per_10k ~annual_pm2.5 + median_age +percent_non_hisp_white + percent_non_hisp_black + percent_hisp_white + percentage_high_education + percent_male, data=merge)
summary (lm_pm2.5_cardio_adjusted_best)%>%
  tab_model()
  cardio hosp rate per 10 k
Predictors Estimates CI p
(Intercept) 157.52 -155.87 – 470.91 0.318
annual pm2 5 6.97 -0.14 – 14.08 0.055
median age 1.73 0.01 – 3.44 0.049
percent non hisp white 117.26 -30.46 – 264.97 0.117
percent non hisp black 222.99 -83.73 – 529.70 0.151
percent hisp white 439.56 -49.52 – 928.63 0.077
percentage high education -148.61 -224.74 – -72.49 <0.001
percent male -405.53 -920.48 – 109.43 0.120
Observations 62
R2 / R2 adjusted 0.329 / 0.242

All model together

tab_model(lm_pm2.5_birthweight_adjusted_best,lm_pm2.5_premature_adjusted_best,lm_pm2.5_cancer_adjusted_best,lm_pm2.5_asthma_adjusted_best,lm_pm2.5_cardio_adjusted )
  percent lowbirthweight premature percentage cancer mortality per 100
k
asthma hosp rate per 10 k cardio hosp rate per 10 k
Predictors Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p
(Intercept) 11.53 7.18 – 15.88 <0.001 4.88 0.96 – 8.80 0.016 -1.85 -517.25 – 513.56 0.994 1.36 0.06 – 2.67 0.041 183.17 -163.05 – 529.38 0.293
median age -0.11 -0.21 – -0.02 0.024 0.08 -0.01 – 0.17 0.067 13.14 9.82 – 16.46 <0.001 1.92 0.04 – 3.79 0.045
percent non hisp black 8.05 1.34 – 14.76 0.019 8.03 2.63 – 13.42 0.004 536.28 22.68 – 1049.88 0.041 13.52 5.77 – 21.26 0.001 241.39 -71.94 – 554.71 0.128
annual pm2 5 13.27 0.99 – 25.54 0.035 7.22 -0.18 – 14.62 0.055
percent high income -213.49 -373.22 – -53.75 0.010 4.65 0.32 – 8.99 0.036 -44.45 -183.68 – 94.77 0.525
percent non hisp white 464.03 200.92 – 727.14 0.001 106.54 -56.54 – 269.62 0.196
percent hisp white 1023.82 103.51 – 1944.13 0.030 537.88 -62.53 – 1138.29 0.078
percent hisp black -1787.43 -4172.76 – 597.90 0.139 366.67 314.84 – 418.50 <0.001 -640.77 -2082.67 – 801.13 0.377
percent male -574.15 -1409.17 – 260.87 0.174 -432.05 -966.25 – 102.15 0.111
percentage high education -5.31 -9.55 – -1.06 0.015 -131.53 -249.43 – -13.64 0.029
Observations 62 61 61 59 62
R2 / R2 adjusted 0.274 / 0.249 0.135 / 0.105 0.809 / 0.780 0.935 / 0.930 0.341 / 0.227